body .main-container {
max-width: 100% !important;
width: 100% !important;
}
body {
max-width: 100% !important;
}
body, td {
font-size: 16px;
}
code.r{
font-size: 14px;
}
pre {
font-size: 14px
}Let us set some global options for all code chunks in this document.
knitr::opts_chunk$set(
message = FALSE, # Disable messages printed by R code chunks
warning = FALSE, # Disable warnings printed by R code chunks
echo = TRUE, # Show R code within code chunks in output
include = TRUE, # Include both R code and its results in output
eval = FALSE, # Evaluate R code chunks
cache = TRUE, # Enable caching of R code chunks for faster rendering
fig.align = "center",
out.width = "100%"
)
#set.seed(1982)library(ggplot2)
library(gridExtra)
library(dplyr)
library(sf)
library(rmarkdown)
library(here)We show all the scripts used to build the graph and generate the data up to this point. None of the scripts is actually run. They are just shown exactly how they were initially written. Some of the chunks are executed just to show some features of the objects.
This file is named
Preprocessing/16Process_raw_data_with_ID.R and takes the
original data (called Speed_Observations_SF.csv and
obtained from this official
website) and filters it as showed below.
It produces
Data_files/january_with_ID.RData.
library(dplyr)
library(sf)
# bus numbers according to Wikipedia
busesnumber = c(8501:8530, 8531:8560, 8601:8662, 8701:8750, 8751:8780,
8800:8969, 6500:6554, 6560:6697, 6700:6730, 5701:5885, 7201:7293)
# read the raw data (which is not on the tracked repository)
raw = read.csv("~/Desktop/Spring 2024/Speed_Observations_SF.csv")
# get just day, hour, and speed in kph
january = raw %>%
filter(Vehicle.ID %in% busesnumber) %>% # to choose only buses
filter(Latitude > 37.7, Latitude < 37.815, Longitude > -122.52, Longitude < -122.36) %>% # to remove some weird location in front of Gabon in Africa
st_as_sf(coords = c("Longitude", "Latitude"), crs = 4326) %>% # to transform it as an sf object
mutate(datetime = strptime(Position.Date.Time, format="%m/%d/%Y %I:%M:%S %p")) %>% # to format so we can manipulate later
mutate(day = as.integer(format(datetime, format = "%d")), hour = as.integer(format(datetime, format = "%H"))) %>% # to get day and hour
filter(Average.Speed < 73) %>% # to remove some atypical speed observations. 73 because we are allowing 10kph above the limit
mutate(Average.Speed = Average.Speed*1.60934) %>% # to transform from mph to kph
select(-Heading) %>% # to remove variable Position.Date.Time and Heading
rename(speed = Average.Speed, ID = Vehicle.ID, PDT = Position.Date.Time) # to rename Average.Speed to speed so it is easier to write
save(january, file = "Data_files/january_with_ID.RData")This file is named
Preprocessing/19Remove_zero_speed_for_long_periods.R and
takes Data_files/january_with_ID.RData and removes
consecutive zero speed observations (when the bus has stopped for a long
period).
It produces
Data_files/data_day7142128_hour13_with_no_consecutive_zeros.RData.
library(MetricGraph)
library(dplyr)
library(TSstudio)
library(ggplot2)
library(gganimate)
library(tsbox)
remove_consecutive_zeros <- function(vec) {
# Initialize a result vector
result <- numeric(length(vec))
# Index for the result vector
index <- 1
# Flag to track if the first zero has been encountered
first_zero <- FALSE
# Loop through the original vector
for (i in 1:length(vec)) {
# If current value is not zero or previous value is not zero, or it's the first zero, add it to result
if (vec[i] != 0 || (i > 1 && vec[i - 1] != 0) || i == 1) {
result[index] <- vec[i]
index <- index + 1
# Reset first_zero flag if it's the first zero
if (vec[i] == 0 && !first_zero) {
first_zero <- TRUE
}
} else {
# Replace consecutive zeros with NA after the first zero
result[index] <- NA
index <- index + 1
}
}
# Trim the result vector to remove unused entries
result <- result[1:(index - 1)]
return(result)
}
# loading the data
load("~/Desktop/Spring 2024/january_with_ID.RData")
# just filtering
aux = january %>%
filter(day %in% c(7,14,21,28), hour %in% c(13)) %>% # every Thursday of January 2021
mutate(day = day/7) %>%
dplyr::select(-PDT, -hour)
#distinct(geometry, .keep_all = TRUE) # to remove observations with the same location
buses_ID = unique(aux$ID)
days = 1:4
df = aux %>%
filter(ID == buses_ID[1], day == 1) %>%
arrange(datetime) %>%
mutate(speed = remove_consecutive_zeros(speed)) %>%
drop_na(speed)
for (i in 2:length(buses_ID)) {
for (j in 1:length(days)) {
check = aux %>% filter(ID == buses_ID[i], day == days[j])
if(nrow(check) > 0){
tmp = check %>%
arrange(datetime) %>%
mutate(speed = remove_consecutive_zeros(speed)) %>%
drop_na(speed)
df = rbind(tmp, df)
}
}
}
save(df, file = "Data_files/data_day7142128_hour13_with_no_consecutive_zeros.RData")load(here("Data_files/data_day7142128_hour13_with_no_consecutive_zeros.RData"))
df |> head(5) |> paged_table()df |> dim()## [1] 149575 5
ggplot() + geom_sf(data = df, aes(color = speed)) + theme_minimal()Speed observations
This file is named
Preprocessing/9Creates_tomtom_dataset_from_shapefile_and_json.R
and extracts data from both a .shp and a .json
files to create an sf object that we use later to build the
graph and get the value of some covariates.
It produces Data_files/tomtom.RData.
library(sf)
library(mapview)
library(jsonlite)
library(dplyr)
#tomtom_geojson = st_read("Data_files/San_Francisco_data_from_TomTom.geojson")
# read shape file
tomtom_sph = st_read("Data_files/San_Francisco_data_from_TomTom/network.shp")
# read json file and select the information of interest
tomtom_json = fromJSON("Data_files/San_Francisco_data_from_TomTom.json")$network$segmentResults$segmentTimeResults
# Initialize empty data frames for 10 and 12 columns
df_10_columns <- data.frame()
df_12_columns <- data.frame()
# Iterate through the list of data frames
for (i in seq_along(tomtom_json)) {
# Check the number of columns in the current data frame
num_cols <- ncol(tomtom_json[[i]])
# Add list number column
tomtom_json[[i]]$List_Number <- i
# Append to the appropriate data frame based on the number of columns
if (num_cols == 10) {
df_10_columns <- rbind(df_10_columns, tomtom_json[[i]])
} else if (num_cols == 12) {
df_12_columns <- rbind(df_12_columns, tomtom_json[[i]])
}
}
# add two columns with NA values so that we can rbin later
from_10_to_12 = df_10_columns %>% mutate(standardDeviationSpeed = NA, travelTimeStandardDeviation = NA)
# rbind and order by List_Number
almost_tomtom = rbind(from_10_to_12, df_12_columns) %>% arrange(List_Number)
# join shape file and the dataset we get from the above process
casi_tomtom = bind_cols(tomtom_sph, almost_tomtom) %>%
mutate(FRC = as.character(FRC))
PERCENTILES = do.call(rbind, casi_tomtom$speedPercentiles) %>% as.data.frame()
names(PERCENTILES) = paste(seq(5, 95, by = 5), "percentile", sep = "")
tomtom = bind_cols(casi_tomtom, PERCENTILES) %>% dplyr::select(-speedPercentiles)
# save the obtained data set
save(tomtom, file = "Data_files/tomtom.RData")load(here("Data_files/tomtom.RData"))
tomtom |> head(5) |> paged_table()tomtom |> dim()## [1] 67199 39
ggplot() + geom_sf(data = tomtom, aes(color = SpeedLimit)) + theme_minimal()Speed Limits
This file is named
Preprocessing/28Window.case.file1.graph.builder.R and
takes
Data_files/tomtom.RData
Data_files/data_day7142128_hour13_with_no_consecutive_zeros.RData.
It defines polygon to cut the data and the
network.
Then it builds the graph and stores it as
Graph_objects/graph_construction_28_03_2024partialtomtomwhichlonglatsf.RData.
It also processes the data into graph-data and saves it as
Data_files/data_day7142128_hour13_with_no_consecutive_zeros_partialtomtom_graph_processed.RData.
library(sf)
library(dplyr)
library(tidyr)
library(mapview)
library(MetricGraph)
# load network from tomtom
load("Data_files/tomtom.RData")
load("Data_files/data_day7142128_hour13_with_no_consecutive_zeros.RData") # just to get the crs
################################################################################
# build polygon to cut the network and the data #
################################################################################
# to define a polygon to cut the network and the data
polygon = st_multipoint(c(st_point(c(-122.42591, 37.80732)),
st_point(c(-122.40648, 37.81002)),
st_point(c(-122.38820, 37.78959)),
st_point(c(-122.38770, 37.78849)),
st_point(c(-122.38483, 37.76713)),
st_point(c(-122.39342, 37.76665)),
st_point(c(-122.39372, 37.76655)),
st_point(c(-122.41970, 37.76500)),
st_point(c(-122.42039, 37.77159)),
st_point(c(-122.42050, 37.77174)),
st_point(c(-122.42040, 37.77194)),
st_point(c(-122.41905, 37.77306)))) %>%
st_cast("POLYGON") %>%
st_sfc(crs = st_crs(df))
from.tomtom = tomtom %>%
dplyr::select(-Id, -Segment.Id, -NewSegId, -timeSet, -dateRange, -standardDeviationSpeed, -travelTimeStandardDeviation) %>%
filter(FRC != "7") %>%
mutate(value = SpeedLimit, road_type = paste("class_", FRC, sep = ""), aux = paste("class_", FRC, sep = "")) %>%
pivot_wider(names_from = aux, values_from = value, values_fill = list(value = 0)) %>%
mutate(upto1 = class_0 + class_1) %>%
mutate(upto3 = upto1 + class_3) %>%
mutate(upto4 = upto3 + class_4) %>%
mutate(upto5 = upto4 + class_5) %>%
mutate(upto6 = upto5 + class_6) %>%
mutate(Length = Length/1000) %>% # in km
mutate(density = sampleSize/Length) %>% # density whole day
mutate(density_per_hour = density/24) %>% # density per hour
st_transform(crs = st_crs(df)) %>%
st_filter(x = ., y = polygon, .predicate = st_within)
# get the weights and edges
weights = from.tomtom %>% st_drop_geometry()
edges = from.tomtom$geometry
# building the graph
graph = graph_components$new(edges = edges, which_longlat = "sf", longlat = TRUE, edge_weights = weights)
# getting the largest connected component
sf_graph = graph$get_largest()
################################################################################
# save the graph #
################################################################################
save(sf_graph, file = "Graph_objects/graph_construction_28_03_2024partialtomtomwhichlonglatsf.RData")
################################################################################
# process the data #
################################################################################
data.reduced = st_filter(x = df, y = polygon, .predicate = st_within)
sf_graph$add_observations(data = data.reduced, group = "day", tolerance = 0.02, duplicated_strategy = "jitter") # tolerance = 40m
# getting the data from the graph (that is, in graph coordinates)
data_on_graph = sf_graph$get_data()
################################################################################
# save the data #
################################################################################
save(data_on_graph, file = "Data_files/data_day7142128_hour13_with_no_consecutive_zeros_partialtomtom_graph_processed.RData")load(here("Data_files/data_day7142128_hour13_with_no_consecutive_zeros_partialtomtom_graph_processed.RData"))
data_on_graph |> head(5) |> paged_table()data_on_graph |> dim()## [1] 39575 10
load("~/Desktop/Spring 2024/Sanfrancisco_data_analysis/Graph_objects/graph_construction_28_03_2024partialtomtomwhichlonglatsf.RData")
sf_graph$plot(vertex_size = 0,
edge_width = 1,
edge_weight = "SpeedLimit",
edge_width_weight = "SpeedLimit",
edge_color = "SpeedLimit",
add_new_scale_weights = FALSE)Speed Limits
Preprocessing/5Traffic_stop_inclussion.R and downloads data
from OSM and saves it as
Networks object/traffic_stop.RData.library(mapview)
library(dplyr)
library(plotly)
library(osmdata)
library(sf)
bb = c(-122.53000, 37.69702, -122.37000 , 37.82600) #c(-122.51362, 37.69702, -122.35779 , 37.81130)
data = opq(bbox = bb) %>%
add_osm_feature(key="highway", value=c("traffic_signals", "bus_stop", "stop", "crossing")) %>%
osmdata_sf()
datapoints<- data$osm_points%>%
filter(highway %in% c("traffic_signals", "bus_stop", "stop", "crossing")) %>%
select(geometry, highway)
save(datapoints, file = "Networks object/traffic_stop.RData")load(here("Networks object/traffic_stop.RData"))
datapoints |> head(5) |> paged_table()datapoints |> dim()## [1] 21733 2
ggplot() + geom_sf(data = datapoints, aes(color = highway)) + theme_minimal()Traffic signs
datapoints$highway |> table() |> as.data.frame() |> paged_table()This file is named
Preprocessing/28Window.case.file2.stop_signsprocessing.R
and takes
Graph_objects/graph_construction_28_03_2024partialtomtomwhichlonglatsf.RData
Networks object/traffic_stop.RData
Data_files/data_day7142128_hour13_with_no_consecutive_zeros.RData.
It processes the traffic signs information into graph format and
saves it as
Data_files/stops_data_on_graph_partialtomtom.RData.
library(MetricGraph)
library(dplyr)
library(tidyr)
library(sf)
library(plotly)
load("Graph_objects/graph_construction_28_03_2024partialtomtomwhichlonglatsf.RData")
load("Networks object/traffic_stop.RData")
load("Data_files/data_day7142128_hour13_with_no_consecutive_zeros.RData") # just to get the crs
################################################################################
# build polygon to cut the network and the data #
################################################################################
# to define a polygon to cut the network and the data
polygon = st_multipoint(c(st_point(c(-122.42591, 37.80732)),
st_point(c(-122.40648, 37.81002)),
st_point(c(-122.38820, 37.78959)),
st_point(c(-122.38770, 37.78849)),
st_point(c(-122.38483, 37.76713)),
st_point(c(-122.39342, 37.76665)),
st_point(c(-122.39372, 37.76655)),
st_point(c(-122.41970, 37.76500)),
st_point(c(-122.42039, 37.77159)),
st_point(c(-122.42050, 37.77174)),
st_point(c(-122.42040, 37.77194)),
st_point(c(-122.41905, 37.77306)))) %>%
st_cast("POLYGON") %>%
st_sfc(crs = st_crs(df))
datapoints.reduced = datapoints %>% st_transform(crs = st_crs(df)) %>% st_filter(x = ., y = polygon, .predicate = st_within)
stops_obs <- sf_graph$add_observations(data = datapoints.reduced, tolerance = 0.003, clear_obs = TRUE) # tolerance 7m
traffic_stops = sf_graph$get_data()
save(traffic_stops, file="Data_files/stops_data_on_graph_partialtomtom.RData")load(here("Data_files/stops_data_on_graph_partialtomtom.RData"))
traffic_stops |> head(5) |> paged_table()traffic_stops |> dim()## [1] 2926 7
ggplot() + geom_point(data = traffic_stops, aes(x = .coord_x, y = .coord_y, color = highway)) + theme_minimal()Traffic signs on graph
traffic_stops$highway |> table() |> as.data.frame() |> paged_table()This file is named
Preprocessing/28Window.case.file3.addcovariates.R and
takes
Graph_objects/graph_construction_28_03_2024partialtomtomwhichlonglatsf.RData
Data_files/data_day7142128_hour13_with_no_consecutive_zeros_partialtomtom_graph_processed.RData
Data_files/stops_data_on_graph_partialtomtom.RData.
It produces
Data_files/data_on_graph_with_covariates_no_consecutive_zeros_partialtomtom.RData.
library(MetricGraph)
library(dplyr)
# loading the data
load("Graph_objects/graph_construction_28_03_2024partialtomtomwhichlonglatsf.RData")
load("Data_files/data_day7142128_hour13_with_no_consecutive_zeros_partialtomtom_graph_processed.RData")
load("Data_files/stops_data_on_graph_partialtomtom.RData")
to_remove = data_on_graph %>% filter(speed == 0, .distance_to_graph > 0.001) # removing zero speed observations that are 5m or more away from the graph
data_on_graph = setdiff(data_on_graph, to_remove) %>% filter(.distance_to_graph <= 0.003)
# length of each edge
edge_length = sf_graph$get_edge_lengths()
# matrix of vertices, the vertices in the i-th are the vertices that define edge i
vertex_matrix = sf_graph$E
sf_graph$add_observations(data = data_on_graph, group = "day", clear_obs = TRUE)
sf_graph$edgeweight_to_data(data_loc = TRUE)
data_on_graph = sf_graph$get_data() %>% drop_na(-StreetName) # this drops all rows with at least one NA value but without taking into account StreetName
# getting the edge_number and distance_on_edge columns from data
data_on_graph_red = data.frame(edge_number = data_on_graph$.edge_number,
distance_on_edge = data_on_graph$.distance_on_edge)
# defining function that decays from 1
fun = function(x){return(exp(-((x/0.05)^2)))}
# ------------------------------------------------------------------------------------------------------------------
# For bus_stop
# ------------------------------------------------------------------------------------------------------------------
# getting the sign_data (this could be bus_stop, traffic_signals, stop or crossing)
sign_data = traffic_stops %>%
filter(highway == "bus_stop") %>%
as.data.frame() %>%
dplyr::select(.edge_number, .distance_on_edge) %>%
rename(edge_number = .edge_number, distance_on_edge = .distance_on_edge)
# initializing a vector to store the edges numbers from sign_data (they are not necessarily unique)
edges = c()
borderline = 0.1
# looping over the signs
list_for_indices = list()
list_for_indices[[(sf_graph$nE + 1)]] = "tmp1"
list_for_max_values = list()
list_for_max_values[[(sf_graph$nE + 1)]] = "tmp2"
# ----------------------------------------------------------------------------------------------
for (k in 1:nrow(sign_data)) {
# getting the sign at iteration i
sign = sign_data[k,]
# getting the edge that contains the sign
edge_number = sign[[1]]
# storing the edge_number into edges vector
edges[k] = edge_number
# getting the distance on the edge of the sign
distance_on_edge = sign[[2]]
original_distance = as.numeric(edge_length[edge_number])
center_sign = original_distance*distance_on_edge
vertices = vertex_matrix[edge_number,]
if(center_sign <= borderline){
# going
going_edges = which(vertex_matrix[,1] %in% vertices[1])
if(length(going_edges) > 0){
for (i in 1:length(going_edges)) {
center_going = -center_sign
idx = which(data_on_graph_red$edge_number == going_edges[i])
list_for_indices[[going_edges[i]]] = idx
aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[going_edges[i]])
eval = matrix(fun(aux - center_going), ncol = 1)
list_for_max_values[[going_edges[i]]] = apply(cbind(eval, list_for_max_values[[going_edges[i]]]) , 1, max)
}
}
# coming
coming_edges = which(vertex_matrix[,2] %in% vertices[1])
if(length(coming_edges) > 0){
for (i in 1:length(coming_edges)) {
center_coming = as.numeric(edge_length[coming_edges[i]]) + center_sign
idx = which(data_on_graph_red$edge_number == coming_edges[i])
list_for_indices[[coming_edges[i]]] = idx
aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[coming_edges[i]])
eval = matrix(fun(aux - center_coming), ncol = 1)
list_for_max_values[[coming_edges[i]]] = apply(cbind(eval, list_for_max_values[[coming_edges[i]]]) , 1, max)
}
}
}
if(center_sign >= (original_distance- borderline)){
# going
going_edges = which(vertex_matrix[,1] %in% vertices[2])
if(length(going_edges) > 0){
for (i in 1:length(going_edges)) {
center_going = -(original_distance - center_sign)
idx = which(data_on_graph_red$edge_number == going_edges[i])
list_for_indices[[going_edges[i]]] = idx
aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[going_edges[i]])
eval = matrix(fun(aux - center_going), ncol = 1)
list_for_max_values[[going_edges[i]]] = apply(cbind(eval, list_for_max_values[[going_edges[i]]]) , 1, max)
}
}
# coming
coming_edges = which(vertex_matrix[,2] %in% vertices[2])
if(length(coming_edges) > 0){
for (i in 1:length(coming_edges)) {
center_coming = as.numeric(edge_length[coming_edges[i]]) + (original_distance - center_sign)
idx = which(data_on_graph_red$edge_number == coming_edges[i])
list_for_indices[[coming_edges[i]]] = idx
aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[coming_edges[i]])
eval = matrix(fun(aux - center_coming), ncol = 1)
list_for_max_values[[coming_edges[i]]] = apply(cbind(eval, list_for_max_values[[coming_edges[i]]]) , 1, max)
} # end for
}# end if
} # end else
}
#----------------------------------------------------------------------------------------------------
# we compute covariate at the points on each edge just taking into account only signs in the same edge
for (k in 1:nrow(sign_data)) {
# getting the sign at iteration i
sign = sign_data[k,]
# getting the edge that contains the sign
edge_number = sign[[1]]
# storing the edge_number into edges vector
edges[k] = edge_number
# getting the distance on the edge of the sign
distance_on_edge = sign[[2]]
# indices of the data that has edge numbers equal to edge_number (in practice, getting the identity of the point the edge with number edge_number)
idx = which(data_on_graph_red$edge_number == edge_number)
list_for_indices[[edge_number]] = cbind(idx, list_for_indices[[edge_number]]) # to repeat the columns
# getting the distance of the points in the edge of interest
aux = data_on_graph_red$distance_on_edge[idx]
# computing the value of the covariate (with normalized and no normalized distance)
eval = matrix(fun(as.numeric(edge_length[edge_number])*(aux - distance_on_edge)), ncol = 1)
list_for_max_values[[edge_number]] = apply(cbind(eval, list_for_max_values[[edge_number]]) , 1, max)
}
list_for_indices[[(sf_graph$nE + 1)]] = NULL
list_for_max_values[[(sf_graph$nE + 1)]] = NULL
# drop NULL members
filtered_max_values = list_for_max_values[!sapply(list_for_max_values, is.null)]
filtered_indices = list_for_indices[!sapply(list_for_indices, is.null)]
new_filtered_max_values = filtered_max_values[sapply(filtered_max_values, function(x) length(x) > 0)]
new_filtered_indices = filtered_indices[sapply(filtered_indices, function(x) length(x) > 0)]
# Creating number of signs covariate ----------------------------------------------------------
cov_number_sign = rep(0, nrow(data_on_graph_red))
for (i in 1:length(new_filtered_indices)) {
if(length(dim(new_filtered_indices[[i]])) == 2){ # that is, if it is a matrix
cov_number_sign[new_filtered_indices[[i]][,1]] = ncol(new_filtered_indices[[i]])
new_filtered_indices[[i]] = as.vector(new_filtered_indices[[i]][,1]) # just getting one column
}
}
# -------------------------------------------------------------------------------------------------
covariate_alltogether = data.frame(all_indices = unlist(new_filtered_indices),
all_max_values = unlist(new_filtered_max_values))
final_cov = rep(0, nrow(data_on_graph_red))
final_cov[covariate_alltogether$all_indices] = covariate_alltogether$all_max_values
final1 = data_on_graph_red %>% mutate(final_cov = final_cov, cov_number_sign = cov_number_sign)
# ------------------------------------------------------------------------------------------------------------------
# For traffic_signals
# ------------------------------------------------------------------------------------------------------------------
# getting the sign_data (this could be bus_stop, traffic_signals, stop or crossing)
sign_data = traffic_stops %>%
filter(highway == "traffic_signals") %>%
as.data.frame() %>%
dplyr::select(.edge_number, .distance_on_edge) %>%
rename(edge_number = .edge_number, distance_on_edge = .distance_on_edge)
# initializing a vector to store the edges numbers from sign_data (they are not necessarily unique)
edges = c()
borderline = 0.05
# looping over the signs
list_for_indices = list()
list_for_indices[[(sf_graph$nE + 1)]] = "tmp1"
list_for_max_values = list()
list_for_max_values[[(sf_graph$nE + 1)]] = "tmp2"
# ----------------------------------------------------------------------------------------------
for (k in 1:nrow(sign_data)) {
# getting the sign at iteration i
sign = sign_data[k,]
# getting the edge that contains the sign
edge_number = sign[[1]]
# storing the edge_number into edges vector
edges[k] = edge_number
# getting the distance on the edge of the sign
distance_on_edge = sign[[2]]
original_distance = as.numeric(edge_length[edge_number])
center_sign = original_distance*distance_on_edge
vertices = vertex_matrix[edge_number,]
if(center_sign <= borderline){
# going
going_edges = which(vertex_matrix[,1] %in% vertices[1])
if(length(going_edges) > 0){
for (i in 1:length(going_edges)) {
center_going = -center_sign
idx = which(data_on_graph_red$edge_number == going_edges[i])
list_for_indices[[going_edges[i]]] = idx
aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[going_edges[i]])
eval = matrix(fun(aux - center_going), ncol = 1)
list_for_max_values[[going_edges[i]]] = apply(cbind(eval, list_for_max_values[[going_edges[i]]]) , 1, max)
}
}
# coming
coming_edges = which(vertex_matrix[,2] %in% vertices[1])
if(length(coming_edges) > 0){
for (i in 1:length(coming_edges)) {
center_coming = as.numeric(edge_length[coming_edges[i]]) + center_sign
idx = which(data_on_graph_red$edge_number == coming_edges[i])
list_for_indices[[coming_edges[i]]] = idx
aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[coming_edges[i]])
eval = matrix(fun(aux - center_coming), ncol = 1)
list_for_max_values[[coming_edges[i]]] = apply(cbind(eval, list_for_max_values[[coming_edges[i]]]) , 1, max)
}
}
}
if(center_sign >= (original_distance- borderline)){
# going
going_edges = which(vertex_matrix[,1] %in% vertices[2])
if(length(going_edges) > 0){
for (i in 1:length(going_edges)) {
center_going = -(original_distance - center_sign)
idx = which(data_on_graph_red$edge_number == going_edges[i])
list_for_indices[[going_edges[i]]] = idx
aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[going_edges[i]])
eval = matrix(fun(aux - center_going), ncol = 1)
list_for_max_values[[going_edges[i]]] = apply(cbind(eval, list_for_max_values[[going_edges[i]]]) , 1, max)
}
}
# coming
coming_edges = which(vertex_matrix[,2] %in% vertices[2])
if(length(coming_edges) > 0){
for (i in 1:length(coming_edges)) {
center_coming = as.numeric(edge_length[coming_edges[i]]) + (original_distance - center_sign)
idx = which(data_on_graph_red$edge_number == coming_edges[i])
list_for_indices[[coming_edges[i]]] = idx
aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[coming_edges[i]])
eval = matrix(fun(aux - center_coming), ncol = 1)
list_for_max_values[[coming_edges[i]]] = apply(cbind(eval, list_for_max_values[[coming_edges[i]]]) , 1, max)
} # end for
}# end if
} # end else
}
#----------------------------------------------------------------------------------------------------
# we compute covariate at the points on each edge just taking into account only signs in the same edge
for (k in 1:nrow(sign_data)) {
# getting the sign at iteration i
sign = sign_data[k,]
# getting the edge that contains the sign
edge_number = sign[[1]]
# storing the edge_number into edges vector
edges[k] = edge_number
# getting the distance on the edge of the sign
distance_on_edge = sign[[2]]
# indices of the data that has edge numbers equal to edge_number (in practice, getting the identity of the point the edge with number edge_number)
idx = which(data_on_graph_red$edge_number == edge_number)
list_for_indices[[edge_number]] = cbind(idx, list_for_indices[[edge_number]]) # to repeat the columns
# getting the distance of the points in the edge of interest
aux = data_on_graph_red$distance_on_edge[idx]
# computing the value of the covariate (with normalized and no normalized distance)
eval = matrix(fun(as.numeric(edge_length[edge_number])*(aux - distance_on_edge)), ncol = 1)
list_for_max_values[[edge_number]] = apply(cbind(eval, list_for_max_values[[edge_number]]) , 1, max)
}
list_for_indices[[(sf_graph$nE + 1)]] = NULL
list_for_max_values[[(sf_graph$nE + 1)]] = NULL
# drop NULL members
filtered_max_values = list_for_max_values[!sapply(list_for_max_values, is.null)]
filtered_indices = list_for_indices[!sapply(list_for_indices, is.null)]
new_filtered_max_values = filtered_max_values[sapply(filtered_max_values, function(x) length(x) > 0)]
new_filtered_indices = filtered_indices[sapply(filtered_indices, function(x) length(x) > 0)]
# Creating number of signs covariate ----------------------------------------------------------
cov_number_sign = rep(0, nrow(data_on_graph_red))
for (i in 1:length(new_filtered_indices)) {
if(length(dim(new_filtered_indices[[i]])) == 2){ # that is, if it is a matrix
cov_number_sign[new_filtered_indices[[i]][,1]] = ncol(new_filtered_indices[[i]])
new_filtered_indices[[i]] = as.vector(new_filtered_indices[[i]][,1]) # just getting one column
}
}
# -------------------------------------------------------------------------------------------------
covariate_alltogether = data.frame(all_indices = unlist(new_filtered_indices),
all_max_values = unlist(new_filtered_max_values))
final_cov = rep(0, nrow(data_on_graph_red))
final_cov[covariate_alltogether$all_indices] = covariate_alltogether$all_max_values
final2 = data_on_graph_red %>% mutate(final_cov = final_cov, cov_number_sign = cov_number_sign)
# ------------------------------------------------------------------------------------------------------------------
# For stop
# ------------------------------------------------------------------------------------------------------------------
# getting the sign_data (this could be bus_stop, traffic_signals, stop or crossing)
sign_data = traffic_stops %>%
filter(highway == "stop") %>%
as.data.frame() %>%
dplyr::select(.edge_number, .distance_on_edge) %>%
rename(edge_number = .edge_number, distance_on_edge = .distance_on_edge)
# initializing a vector to store the edges numbers from sign_data (they are not necessarily unique)
edges = c()
borderline = 0.05
# looping over the signs
list_for_indices = list()
list_for_indices[[(sf_graph$nE + 1)]] = "tmp1"
list_for_max_values = list()
list_for_max_values[[(sf_graph$nE + 1)]] = "tmp2"
# ----------------------------------------------------------------------------------------------
for (k in 1:nrow(sign_data)) {
# getting the sign at iteration i
sign = sign_data[k,]
# getting the edge that contains the sign
edge_number = sign[[1]]
# storing the edge_number into edges vector
edges[k] = edge_number
# getting the distance on the edge of the sign
distance_on_edge = sign[[2]]
original_distance = as.numeric(edge_length[edge_number])
center_sign = original_distance*distance_on_edge
vertices = vertex_matrix[edge_number,]
if(center_sign <= borderline){
# going
going_edges = which(vertex_matrix[,1] %in% vertices[1])
if(length(going_edges) > 0){
for (i in 1:length(going_edges)) {
center_going = -center_sign
idx = which(data_on_graph_red$edge_number == going_edges[i])
list_for_indices[[going_edges[i]]] = idx
aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[going_edges[i]])
eval = matrix(fun(aux - center_going), ncol = 1)
list_for_max_values[[going_edges[i]]] = apply(cbind(eval, list_for_max_values[[going_edges[i]]]) , 1, max)
}
}
# coming
coming_edges = which(vertex_matrix[,2] %in% vertices[1])
if(length(coming_edges) > 0){
for (i in 1:length(coming_edges)) {
center_coming = as.numeric(edge_length[coming_edges[i]]) + center_sign
idx = which(data_on_graph_red$edge_number == coming_edges[i])
list_for_indices[[coming_edges[i]]] = idx
aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[coming_edges[i]])
eval = matrix(fun(aux - center_coming), ncol = 1)
list_for_max_values[[coming_edges[i]]] = apply(cbind(eval, list_for_max_values[[coming_edges[i]]]) , 1, max)
}
}
}
if(center_sign >= (original_distance- borderline)){
# going
going_edges = which(vertex_matrix[,1] %in% vertices[2])
if(length(going_edges) > 0){
for (i in 1:length(going_edges)) {
center_going = -(original_distance - center_sign)
idx = which(data_on_graph_red$edge_number == going_edges[i])
list_for_indices[[going_edges[i]]] = idx
aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[going_edges[i]])
eval = matrix(fun(aux - center_going), ncol = 1)
list_for_max_values[[going_edges[i]]] = apply(cbind(eval, list_for_max_values[[going_edges[i]]]) , 1, max)
}
}
# coming
coming_edges = which(vertex_matrix[,2] %in% vertices[2])
if(length(coming_edges) > 0){
for (i in 1:length(coming_edges)) {
center_coming = as.numeric(edge_length[coming_edges[i]]) + (original_distance - center_sign)
idx = which(data_on_graph_red$edge_number == coming_edges[i])
list_for_indices[[coming_edges[i]]] = idx
aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[coming_edges[i]])
eval = matrix(fun(aux - center_coming), ncol = 1)
list_for_max_values[[coming_edges[i]]] = apply(cbind(eval, list_for_max_values[[coming_edges[i]]]) , 1, max)
} # end for
}# end if
} # end else
}
#----------------------------------------------------------------------------------------------------
# we compute covariate at the points on each edge just taking into account only signs in the same edge
for (k in 1:nrow(sign_data)) {
# getting the sign at iteration i
sign = sign_data[k,]
# getting the edge that contains the sign
edge_number = sign[[1]]
# storing the edge_number into edges vector
edges[k] = edge_number
# getting the distance on the edge of the sign
distance_on_edge = sign[[2]]
# indices of the data that has edge numbers equal to edge_number (in practice, getting the identity of the point the edge with number edge_number)
idx = which(data_on_graph_red$edge_number == edge_number)
list_for_indices[[edge_number]] = cbind(idx, list_for_indices[[edge_number]]) # to repeat the columns
# getting the distance of the points in the edge of interest
aux = data_on_graph_red$distance_on_edge[idx]
# computing the value of the covariate (with normalized and no normalized distance)
eval = matrix(fun(as.numeric(edge_length[edge_number])*(aux - distance_on_edge)), ncol = 1)
list_for_max_values[[edge_number]] = apply(cbind(eval, list_for_max_values[[edge_number]]) , 1, max)
}
list_for_indices[[(sf_graph$nE + 1)]] = NULL
list_for_max_values[[(sf_graph$nE + 1)]] = NULL
# drop NULL members
filtered_max_values = list_for_max_values[!sapply(list_for_max_values, is.null)]
filtered_indices = list_for_indices[!sapply(list_for_indices, is.null)]
new_filtered_max_values = filtered_max_values[sapply(filtered_max_values, function(x) length(x) > 0)]
new_filtered_indices = filtered_indices[sapply(filtered_indices, function(x) length(x) > 0)]
# Creating number of signs covariate ----------------------------------------------------------
cov_number_sign = rep(0, nrow(data_on_graph_red))
for (i in 1:length(new_filtered_indices)) {
if(length(dim(new_filtered_indices[[i]])) == 2){ # that is, if it is a matrix
cov_number_sign[new_filtered_indices[[i]][,1]] = ncol(new_filtered_indices[[i]])
new_filtered_indices[[i]] = as.vector(new_filtered_indices[[i]][,1]) # just getting one column
}
}
# -------------------------------------------------------------------------------------------------
covariate_alltogether = data.frame(all_indices = unlist(new_filtered_indices),
all_max_values = unlist(new_filtered_max_values))
final_cov = rep(0, nrow(data_on_graph_red))
final_cov[covariate_alltogether$all_indices] = covariate_alltogether$all_max_values
final3 = data_on_graph_red %>% mutate(final_cov = final_cov, cov_number_sign = cov_number_sign)
# ------------------------------------------------------------------------------------------------------------------
# For crossing
# ------------------------------------------------------------------------------------------------------------------
# getting the sign_data (this could be bus_stop, traffic_signals, stop or crossing)
sign_data = traffic_stops %>%
filter(highway == "crossing") %>%
as.data.frame() %>%
dplyr::select(.edge_number, .distance_on_edge) %>%
rename(edge_number = .edge_number, distance_on_edge = .distance_on_edge)
# initializing a vector to store the edges numbers from sign_data (they are not necessarily unique)
edges = c()
borderline = 0.05
# looping over the signs
list_for_indices = list()
list_for_indices[[(sf_graph$nE + 1)]] = "tmp1"
list_for_max_values = list()
list_for_max_values[[(sf_graph$nE + 1)]] = "tmp2"
# ----------------------------------------------------------------------------------------------
for (k in 1:nrow(sign_data)) {
# getting the sign at iteration i
sign = sign_data[k,]
# getting the edge that contains the sign
edge_number = sign[[1]]
# storing the edge_number into edges vector
edges[k] = edge_number
# getting the distance on the edge of the sign
distance_on_edge = sign[[2]]
original_distance = as.numeric(edge_length[edge_number])
center_sign = original_distance*distance_on_edge
vertices = vertex_matrix[edge_number,]
if(center_sign <= borderline){
# going
going_edges = which(vertex_matrix[,1] %in% vertices[1])
if(length(going_edges) > 0){
for (i in 1:length(going_edges)) {
center_going = -center_sign
idx = which(data_on_graph_red$edge_number == going_edges[i])
list_for_indices[[going_edges[i]]] = idx
aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[going_edges[i]])
eval = matrix(fun(aux - center_going), ncol = 1)
list_for_max_values[[going_edges[i]]] = apply(cbind(eval, list_for_max_values[[going_edges[i]]]) , 1, max)
}
}
# coming
coming_edges = which(vertex_matrix[,2] %in% vertices[1])
if(length(coming_edges) > 0){
for (i in 1:length(coming_edges)) {
center_coming = as.numeric(edge_length[coming_edges[i]]) + center_sign
idx = which(data_on_graph_red$edge_number == coming_edges[i])
list_for_indices[[coming_edges[i]]] = idx
aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[coming_edges[i]])
eval = matrix(fun(aux - center_coming), ncol = 1)
list_for_max_values[[coming_edges[i]]] = apply(cbind(eval, list_for_max_values[[coming_edges[i]]]) , 1, max)
}
}
}
if(center_sign >= (original_distance- borderline)){
# going
going_edges = which(vertex_matrix[,1] %in% vertices[2])
if(length(going_edges) > 0){
for (i in 1:length(going_edges)) {
center_going = -(original_distance - center_sign)
idx = which(data_on_graph_red$edge_number == going_edges[i])
list_for_indices[[going_edges[i]]] = idx
aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[going_edges[i]])
eval = matrix(fun(aux - center_going), ncol = 1)
list_for_max_values[[going_edges[i]]] = apply(cbind(eval, list_for_max_values[[going_edges[i]]]) , 1, max)
}
}
# coming
coming_edges = which(vertex_matrix[,2] %in% vertices[2])
if(length(coming_edges) > 0){
for (i in 1:length(coming_edges)) {
center_coming = as.numeric(edge_length[coming_edges[i]]) + (original_distance - center_sign)
idx = which(data_on_graph_red$edge_number == coming_edges[i])
list_for_indices[[coming_edges[i]]] = idx
aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[coming_edges[i]])
eval = matrix(fun(aux - center_coming), ncol = 1)
list_for_max_values[[coming_edges[i]]] = apply(cbind(eval, list_for_max_values[[coming_edges[i]]]) , 1, max)
} # end for
}# end if
} # end else
}
#----------------------------------------------------------------------------------------------------
# we compute covariate at the points on each edge just taking into account only signs in the same edge
for (k in 1:nrow(sign_data)) {
# getting the sign at iteration i
sign = sign_data[k,]
# getting the edge that contains the sign
edge_number = sign[[1]]
# storing the edge_number into edges vector
edges[k] = edge_number
# getting the distance on the edge of the sign
distance_on_edge = sign[[2]]
# indices of the data that has edge numbers equal to edge_number (in practice, getting the identity of the point the edge with number edge_number)
idx = which(data_on_graph_red$edge_number == edge_number)
list_for_indices[[edge_number]] = cbind(idx, list_for_indices[[edge_number]]) # to repeat the columns
# getting the distance of the points in the edge of interest
aux = data_on_graph_red$distance_on_edge[idx]
# computing the value of the covariate (with normalized and no normalized distance)
eval = matrix(fun(as.numeric(edge_length[edge_number])*(aux - distance_on_edge)), ncol = 1)
list_for_max_values[[edge_number]] = apply(cbind(eval, list_for_max_values[[edge_number]]) , 1, max)
}
list_for_indices[[(sf_graph$nE + 1)]] = NULL
list_for_max_values[[(sf_graph$nE + 1)]] = NULL
# drop NULL members
filtered_max_values = list_for_max_values[!sapply(list_for_max_values, is.null)]
filtered_indices = list_for_indices[!sapply(list_for_indices, is.null)]
new_filtered_max_values = filtered_max_values[sapply(filtered_max_values, function(x) length(x) > 0)]
new_filtered_indices = filtered_indices[sapply(filtered_indices, function(x) length(x) > 0)]
# Creating number of signs covariate ----------------------------------------------------------
cov_number_sign = rep(0, nrow(data_on_graph_red))
for (i in 1:length(new_filtered_indices)) {
if(length(dim(new_filtered_indices[[i]])) == 2){ # that is, if it is a matrix
cov_number_sign[new_filtered_indices[[i]][,1]] = ncol(new_filtered_indices[[i]])
new_filtered_indices[[i]] = as.vector(new_filtered_indices[[i]][,1]) # just getting one column
}
}
# -------------------------------------------------------------------------------------------------
covariate_alltogether = data.frame(all_indices = unlist(new_filtered_indices),
all_max_values = unlist(new_filtered_max_values))
final_cov = rep(0, nrow(data_on_graph_red))
final_cov[covariate_alltogether$all_indices] = covariate_alltogether$all_max_values
final4 = data_on_graph_red %>% mutate(final_cov = final_cov, cov_number_sign = cov_number_sign)
cov_obs <- data_on_graph[[".distance_on_edge"]]
cov_obs <- 2 * ifelse(cov_obs > 0.5, 1 - cov_obs, cov_obs)
data_on_graph_with_covariates = data_on_graph %>% mutate(bus = final1$final_cov,
bus_number = final1$cov_number_sign,
signal = final2$final_cov,
signal_number = final2$cov_number_sign,
stop = final3$final_cov,
stop_number = final3$cov_number_sign,
crossing = final4$final_cov,
crossing_number = final4$cov_number_sign,
cov_obs = cov_obs)
save(data_on_graph_with_covariates, file = "Data_files/data_on_graph_with_covariates_no_consecutive_zeros_partialtomtom.RData")load(here("Data_files/data_on_graph_with_covariates_no_consecutive_zeros_partialtomtom.RData"))
data_on_graph_with_covariates |> head(5) |> paged_table()data_on_graph_with_covariates |> dim()## [1] 14534 64
ggplot() + geom_point(data = data_on_graph_with_covariates, aes(x = .coord_x, y = .coord_y, color = SpeedLimit)) + theme_minimal()Speed Limits on speed observations
This file is named
Preprocessing/28Window.case.file4.addcovariatesonmesh.R and
it defines a function that takes h and uses
Graph_objects/graph_construction_28_03_2024partialtomtomwhichlonglatsf.RData
Data_files/data_day7142128_hour13_with_no_consecutive_zeros_partialtomtom_graph_processed.RData
Data_files/stops_data_on_graph_partialtomtom.RData.
It produces
Data_files/data_on_mesh_with_covariates_partialtomtom.RData.
creates_covariates_on_mesh = function(h){
library(MetricGraph)
library(dplyr)
# loading the data
load("Graph_objects/graph_construction_28_03_2024partialtomtomwhichlonglatsf.RData")
load("Data_files/data_day7142128_hour13_with_no_consecutive_zeros_partialtomtom_graph_processed.RData")
load("Data_files/stops_data_on_graph_partialtomtom.RData")
# length of each edge
edge_length = sf_graph$get_edge_lengths()
# matrix of vertices, the vertices in the i-th are the vertices that define edge i
vertex_matrix = sf_graph$E
sf_graph$add_observations(data = data_on_graph, group = "day", clear_obs = TRUE)
sf_graph$build_mesh(h = h)
data_on_mesh = sf_graph$edgeweight_to_data(mesh = TRUE, add = FALSE, return = TRUE) %>% filter(.group == 1)
data_on_graph = data_on_mesh
# getting the edge_number and distance_on_edge columns from data
data_on_graph_red = data.frame(edge_number = data_on_graph$.edge_number,
distance_on_edge = data_on_graph$.distance_on_edge)
# defining function that decays from 1
fun = function(x){return(exp(-((x/0.05)^2)))}
# ------------------------------------------------------------------------------------------------------------------
# For bus_stop
# ------------------------------------------------------------------------------------------------------------------
# getting the sign_data (this could be bus_stop, traffic_signals, stop or crossing)
sign_data = traffic_stops %>%
filter(highway == "bus_stop") %>%
as.data.frame() %>%
dplyr::select(.edge_number, .distance_on_edge) %>%
rename(edge_number = .edge_number, distance_on_edge = .distance_on_edge)
# initializing a vector to store the edges numbers from sign_data (they are not necessarily unique)
edges = c()
borderline = 0.1
# looping over the signs
list_for_indices = list()
list_for_indices[[(sf_graph$nE + 1)]] = "tmp1"
list_for_max_values = list()
list_for_max_values[[(sf_graph$nE + 1)]] = "tmp2"
# ----------------------------------------------------------------------------------------------
for (k in 1:nrow(sign_data)) {
# getting the sign at iteration i
sign = sign_data[k,]
# getting the edge that contains the sign
edge_number = sign[[1]]
# storing the edge_number into edges vector
edges[k] = edge_number
# getting the distance on the edge of the sign
distance_on_edge = sign[[2]]
original_distance = as.numeric(edge_length[edge_number])
center_sign = original_distance*distance_on_edge
vertices = vertex_matrix[edge_number,]
if(center_sign <= borderline){
# going
going_edges = which(vertex_matrix[,1] %in% vertices[1])
if(length(going_edges) > 0){
for (i in 1:length(going_edges)) {
center_going = -center_sign
idx = which(data_on_graph_red$edge_number == going_edges[i])
list_for_indices[[going_edges[i]]] = idx
aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[going_edges[i]])
eval = matrix(fun(aux - center_going), ncol = 1)
list_for_max_values[[going_edges[i]]] = apply(cbind(eval, list_for_max_values[[going_edges[i]]]) , 1, max)
}
}
# coming
coming_edges = which(vertex_matrix[,2] %in% vertices[1])
if(length(coming_edges) > 0){
for (i in 1:length(coming_edges)) {
center_coming = as.numeric(edge_length[coming_edges[i]]) + center_sign
idx = which(data_on_graph_red$edge_number == coming_edges[i])
list_for_indices[[coming_edges[i]]] = idx
aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[coming_edges[i]])
eval = matrix(fun(aux - center_coming), ncol = 1)
list_for_max_values[[coming_edges[i]]] = apply(cbind(eval, list_for_max_values[[coming_edges[i]]]) , 1, max)
}
}
}
if(center_sign >= (original_distance- borderline)){
# going
going_edges = which(vertex_matrix[,1] %in% vertices[2])
if(length(going_edges) > 0){
for (i in 1:length(going_edges)) {
center_going = -(original_distance - center_sign)
idx = which(data_on_graph_red$edge_number == going_edges[i])
list_for_indices[[going_edges[i]]] = idx
aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[going_edges[i]])
eval = matrix(fun(aux - center_going), ncol = 1)
list_for_max_values[[going_edges[i]]] = apply(cbind(eval, list_for_max_values[[going_edges[i]]]) , 1, max)
}
}
# coming
coming_edges = which(vertex_matrix[,2] %in% vertices[2])
if(length(coming_edges) > 0){
for (i in 1:length(coming_edges)) {
center_coming = as.numeric(edge_length[coming_edges[i]]) + (original_distance - center_sign)
idx = which(data_on_graph_red$edge_number == coming_edges[i])
list_for_indices[[coming_edges[i]]] = idx
aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[coming_edges[i]])
eval = matrix(fun(aux - center_coming), ncol = 1)
list_for_max_values[[coming_edges[i]]] = apply(cbind(eval, list_for_max_values[[coming_edges[i]]]) , 1, max)
} # end for
}# end if
} # end else
}
#----------------------------------------------------------------------------------------------------
# we compute covariate at the points on each edge just taking into account only signs in the same edge
for (k in 1:nrow(sign_data)) {
# getting the sign at iteration i
sign = sign_data[k,]
# getting the edge that contains the sign
edge_number = sign[[1]]
# storing the edge_number into edges vector
edges[k] = edge_number
# getting the distance on the edge of the sign
distance_on_edge = sign[[2]]
# indices of the data that has edge numbers equal to edge_number (in practice, getting the identity of the point the edge with number edge_number)
idx = which(data_on_graph_red$edge_number == edge_number)
list_for_indices[[edge_number]] = cbind(idx, list_for_indices[[edge_number]]) # to repeat the columns
# getting the distance of the points in the edge of interest
aux = data_on_graph_red$distance_on_edge[idx]
# computing the value of the covariate (with normalized and no normalized distance)
eval = matrix(fun(as.numeric(edge_length[edge_number])*(aux - distance_on_edge)), ncol = 1)
list_for_max_values[[edge_number]] = apply(cbind(eval, list_for_max_values[[edge_number]]) , 1, max)
}
list_for_indices[[(sf_graph$nE + 1)]] = NULL
list_for_max_values[[(sf_graph$nE + 1)]] = NULL
# drop NULL members
filtered_max_values = list_for_max_values[!sapply(list_for_max_values, is.null)]
filtered_indices = list_for_indices[!sapply(list_for_indices, is.null)]
new_filtered_max_values = filtered_max_values[sapply(filtered_max_values, function(x) length(x) > 0)]
new_filtered_indices = filtered_indices[sapply(filtered_indices, function(x) length(x) > 0)]
# Creating number of signs covariate ----------------------------------------------------------
cov_number_sign = rep(0, nrow(data_on_graph_red))
for (i in 1:length(new_filtered_indices)) {
if(length(dim(new_filtered_indices[[i]])) == 2){ # that is, if it is a matrix
cov_number_sign[new_filtered_indices[[i]][,1]] = ncol(new_filtered_indices[[i]])
new_filtered_indices[[i]] = as.vector(new_filtered_indices[[i]][,1]) # just getting one column
}
}
# -------------------------------------------------------------------------------------------------
covariate_alltogether = data.frame(all_indices = unlist(new_filtered_indices),
all_max_values = unlist(new_filtered_max_values))
final_cov = rep(0, nrow(data_on_graph_red))
final_cov[covariate_alltogether$all_indices] = covariate_alltogether$all_max_values
final1 = data_on_graph_red %>% mutate(final_cov = final_cov, cov_number_sign = cov_number_sign)
# ------------------------------------------------------------------------------------------------------------------
# For traffic_signals
# ------------------------------------------------------------------------------------------------------------------
# getting the sign_data (this could be bus_stop, traffic_signals, stop or crossing)
sign_data = traffic_stops %>%
filter(highway == "traffic_signals") %>%
as.data.frame() %>%
dplyr::select(.edge_number, .distance_on_edge) %>%
rename(edge_number = .edge_number, distance_on_edge = .distance_on_edge)
# initializing a vector to store the edges numbers from sign_data (they are not necessarily unique)
edges = c()
borderline = 0.05
# looping over the signs
list_for_indices = list()
list_for_indices[[(sf_graph$nE + 1)]] = "tmp1"
list_for_max_values = list()
list_for_max_values[[(sf_graph$nE + 1)]] = "tmp2"
# ----------------------------------------------------------------------------------------------
for (k in 1:nrow(sign_data)) {
# getting the sign at iteration i
sign = sign_data[k,]
# getting the edge that contains the sign
edge_number = sign[[1]]
# storing the edge_number into edges vector
edges[k] = edge_number
# getting the distance on the edge of the sign
distance_on_edge = sign[[2]]
original_distance = as.numeric(edge_length[edge_number])
center_sign = original_distance*distance_on_edge
vertices = vertex_matrix[edge_number,]
if(center_sign <= borderline){
# going
going_edges = which(vertex_matrix[,1] %in% vertices[1])
if(length(going_edges) > 0){
for (i in 1:length(going_edges)) {
center_going = -center_sign
idx = which(data_on_graph_red$edge_number == going_edges[i])
list_for_indices[[going_edges[i]]] = idx
aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[going_edges[i]])
eval = matrix(fun(aux - center_going), ncol = 1)
list_for_max_values[[going_edges[i]]] = apply(cbind(eval, list_for_max_values[[going_edges[i]]]) , 1, max)
}
}
# coming
coming_edges = which(vertex_matrix[,2] %in% vertices[1])
if(length(coming_edges) > 0){
for (i in 1:length(coming_edges)) {
center_coming = as.numeric(edge_length[coming_edges[i]]) + center_sign
idx = which(data_on_graph_red$edge_number == coming_edges[i])
list_for_indices[[coming_edges[i]]] = idx
aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[coming_edges[i]])
eval = matrix(fun(aux - center_coming), ncol = 1)
list_for_max_values[[coming_edges[i]]] = apply(cbind(eval, list_for_max_values[[coming_edges[i]]]) , 1, max)
}
}
}
if(center_sign >= (original_distance- borderline)){
# going
going_edges = which(vertex_matrix[,1] %in% vertices[2])
if(length(going_edges) > 0){
for (i in 1:length(going_edges)) {
center_going = -(original_distance - center_sign)
idx = which(data_on_graph_red$edge_number == going_edges[i])
list_for_indices[[going_edges[i]]] = idx
aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[going_edges[i]])
eval = matrix(fun(aux - center_going), ncol = 1)
list_for_max_values[[going_edges[i]]] = apply(cbind(eval, list_for_max_values[[going_edges[i]]]) , 1, max)
}
}
# coming
coming_edges = which(vertex_matrix[,2] %in% vertices[2])
if(length(coming_edges) > 0){
for (i in 1:length(coming_edges)) {
center_coming = as.numeric(edge_length[coming_edges[i]]) + (original_distance - center_sign)
idx = which(data_on_graph_red$edge_number == coming_edges[i])
list_for_indices[[coming_edges[i]]] = idx
aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[coming_edges[i]])
eval = matrix(fun(aux - center_coming), ncol = 1)
list_for_max_values[[coming_edges[i]]] = apply(cbind(eval, list_for_max_values[[coming_edges[i]]]) , 1, max)
} # end for
}# end if
} # end else
}
#----------------------------------------------------------------------------------------------------
# we compute covariate at the points on each edge just taking into account only signs in the same edge
for (k in 1:nrow(sign_data)) {
# getting the sign at iteration i
sign = sign_data[k,]
# getting the edge that contains the sign
edge_number = sign[[1]]
# storing the edge_number into edges vector
edges[k] = edge_number
# getting the distance on the edge of the sign
distance_on_edge = sign[[2]]
# indices of the data that has edge numbers equal to edge_number (in practice, getting the identity of the point the edge with number edge_number)
idx = which(data_on_graph_red$edge_number == edge_number)
list_for_indices[[edge_number]] = cbind(idx, list_for_indices[[edge_number]]) # to repeat the columns
# getting the distance of the points in the edge of interest
aux = data_on_graph_red$distance_on_edge[idx]
# computing the value of the covariate (with normalized and no normalized distance)
eval = matrix(fun(as.numeric(edge_length[edge_number])*(aux - distance_on_edge)), ncol = 1)
list_for_max_values[[edge_number]] = apply(cbind(eval, list_for_max_values[[edge_number]]) , 1, max)
}
list_for_indices[[(sf_graph$nE + 1)]] = NULL
list_for_max_values[[(sf_graph$nE + 1)]] = NULL
# drop NULL members
filtered_max_values = list_for_max_values[!sapply(list_for_max_values, is.null)]
filtered_indices = list_for_indices[!sapply(list_for_indices, is.null)]
new_filtered_max_values = filtered_max_values[sapply(filtered_max_values, function(x) length(x) > 0)]
new_filtered_indices = filtered_indices[sapply(filtered_indices, function(x) length(x) > 0)]
# Creating number of signs covariate ----------------------------------------------------------
cov_number_sign = rep(0, nrow(data_on_graph_red))
for (i in 1:length(new_filtered_indices)) {
if(length(dim(new_filtered_indices[[i]])) == 2){ # that is, if it is a matrix
cov_number_sign[new_filtered_indices[[i]][,1]] = ncol(new_filtered_indices[[i]])
new_filtered_indices[[i]] = as.vector(new_filtered_indices[[i]][,1]) # just getting one column
}
}
# -------------------------------------------------------------------------------------------------
covariate_alltogether = data.frame(all_indices = unlist(new_filtered_indices),
all_max_values = unlist(new_filtered_max_values))
final_cov = rep(0, nrow(data_on_graph_red))
final_cov[covariate_alltogether$all_indices] = covariate_alltogether$all_max_values
final2 = data_on_graph_red %>% mutate(final_cov = final_cov, cov_number_sign = cov_number_sign)
# ------------------------------------------------------------------------------------------------------------------
# For stop
# ------------------------------------------------------------------------------------------------------------------
# getting the sign_data (this could be bus_stop, traffic_signals, stop or crossing)
sign_data = traffic_stops %>%
filter(highway == "stop") %>%
as.data.frame() %>%
dplyr::select(.edge_number, .distance_on_edge) %>%
rename(edge_number = .edge_number, distance_on_edge = .distance_on_edge)
# initializing a vector to store the edges numbers from sign_data (they are not necessarily unique)
edges = c()
borderline = 0.05
# looping over the signs
list_for_indices = list()
list_for_indices[[(sf_graph$nE + 1)]] = "tmp1"
list_for_max_values = list()
list_for_max_values[[(sf_graph$nE + 1)]] = "tmp2"
# ----------------------------------------------------------------------------------------------
for (k in 1:nrow(sign_data)) {
# getting the sign at iteration i
sign = sign_data[k,]
# getting the edge that contains the sign
edge_number = sign[[1]]
# storing the edge_number into edges vector
edges[k] = edge_number
# getting the distance on the edge of the sign
distance_on_edge = sign[[2]]
original_distance = as.numeric(edge_length[edge_number])
center_sign = original_distance*distance_on_edge
vertices = vertex_matrix[edge_number,]
if(center_sign <= borderline){
# going
going_edges = which(vertex_matrix[,1] %in% vertices[1])
if(length(going_edges) > 0){
for (i in 1:length(going_edges)) {
center_going = -center_sign
idx = which(data_on_graph_red$edge_number == going_edges[i])
list_for_indices[[going_edges[i]]] = idx
aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[going_edges[i]])
eval = matrix(fun(aux - center_going), ncol = 1)
list_for_max_values[[going_edges[i]]] = apply(cbind(eval, list_for_max_values[[going_edges[i]]]) , 1, max)
}
}
# coming
coming_edges = which(vertex_matrix[,2] %in% vertices[1])
if(length(coming_edges) > 0){
for (i in 1:length(coming_edges)) {
center_coming = as.numeric(edge_length[coming_edges[i]]) + center_sign
idx = which(data_on_graph_red$edge_number == coming_edges[i])
list_for_indices[[coming_edges[i]]] = idx
aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[coming_edges[i]])
eval = matrix(fun(aux - center_coming), ncol = 1)
list_for_max_values[[coming_edges[i]]] = apply(cbind(eval, list_for_max_values[[coming_edges[i]]]) , 1, max)
}
}
}
if(center_sign >= (original_distance- borderline)){
# going
going_edges = which(vertex_matrix[,1] %in% vertices[2])
if(length(going_edges) > 0){
for (i in 1:length(going_edges)) {
center_going = -(original_distance - center_sign)
idx = which(data_on_graph_red$edge_number == going_edges[i])
list_for_indices[[going_edges[i]]] = idx
aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[going_edges[i]])
eval = matrix(fun(aux - center_going), ncol = 1)
list_for_max_values[[going_edges[i]]] = apply(cbind(eval, list_for_max_values[[going_edges[i]]]) , 1, max)
}
}
# coming
coming_edges = which(vertex_matrix[,2] %in% vertices[2])
if(length(coming_edges) > 0){
for (i in 1:length(coming_edges)) {
center_coming = as.numeric(edge_length[coming_edges[i]]) + (original_distance - center_sign)
idx = which(data_on_graph_red$edge_number == coming_edges[i])
list_for_indices[[coming_edges[i]]] = idx
aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[coming_edges[i]])
eval = matrix(fun(aux - center_coming), ncol = 1)
list_for_max_values[[coming_edges[i]]] = apply(cbind(eval, list_for_max_values[[coming_edges[i]]]) , 1, max)
} # end for
}# end if
} # end else
}
#----------------------------------------------------------------------------------------------------
# we compute covariate at the points on each edge just taking into account only signs in the same edge
for (k in 1:nrow(sign_data)) {
# getting the sign at iteration i
sign = sign_data[k,]
# getting the edge that contains the sign
edge_number = sign[[1]]
# storing the edge_number into edges vector
edges[k] = edge_number
# getting the distance on the edge of the sign
distance_on_edge = sign[[2]]
# indices of the data that has edge numbers equal to edge_number (in practice, getting the identity of the point the edge with number edge_number)
idx = which(data_on_graph_red$edge_number == edge_number)
list_for_indices[[edge_number]] = cbind(idx, list_for_indices[[edge_number]]) # to repeat the columns
# getting the distance of the points in the edge of interest
aux = data_on_graph_red$distance_on_edge[idx]
# computing the value of the covariate (with normalized and no normalized distance)
eval = matrix(fun(as.numeric(edge_length[edge_number])*(aux - distance_on_edge)), ncol = 1)
list_for_max_values[[edge_number]] = apply(cbind(eval, list_for_max_values[[edge_number]]) , 1, max)
}
list_for_indices[[(sf_graph$nE + 1)]] = NULL
list_for_max_values[[(sf_graph$nE + 1)]] = NULL
# drop NULL members
filtered_max_values = list_for_max_values[!sapply(list_for_max_values, is.null)]
filtered_indices = list_for_indices[!sapply(list_for_indices, is.null)]
new_filtered_max_values = filtered_max_values[sapply(filtered_max_values, function(x) length(x) > 0)]
new_filtered_indices = filtered_indices[sapply(filtered_indices, function(x) length(x) > 0)]
# Creating number of signs covariate ----------------------------------------------------------
cov_number_sign = rep(0, nrow(data_on_graph_red))
for (i in 1:length(new_filtered_indices)) {
if(length(dim(new_filtered_indices[[i]])) == 2){ # that is, if it is a matrix
cov_number_sign[new_filtered_indices[[i]][,1]] = ncol(new_filtered_indices[[i]])
new_filtered_indices[[i]] = as.vector(new_filtered_indices[[i]][,1]) # just getting one column
}
}
# -------------------------------------------------------------------------------------------------
covariate_alltogether = data.frame(all_indices = unlist(new_filtered_indices),
all_max_values = unlist(new_filtered_max_values))
final_cov = rep(0, nrow(data_on_graph_red))
final_cov[covariate_alltogether$all_indices] = covariate_alltogether$all_max_values
final3 = data_on_graph_red %>% mutate(final_cov = final_cov, cov_number_sign = cov_number_sign)
# ------------------------------------------------------------------------------------------------------------------
# For crossing
# ------------------------------------------------------------------------------------------------------------------
# getting the sign_data (this could be bus_stop, traffic_signals, stop or crossing)
sign_data = traffic_stops %>%
filter(highway == "crossing") %>%
as.data.frame() %>%
dplyr::select(.edge_number, .distance_on_edge) %>%
rename(edge_number = .edge_number, distance_on_edge = .distance_on_edge)
# initializing a vector to store the edges numbers from sign_data (they are not necessarily unique)
edges = c()
borderline = 0.05
# looping over the signs
list_for_indices = list()
list_for_indices[[(sf_graph$nE + 1)]] = "tmp1"
list_for_max_values = list()
list_for_max_values[[(sf_graph$nE + 1)]] = "tmp2"
# ----------------------------------------------------------------------------------------------
for (k in 1:nrow(sign_data)) {
# getting the sign at iteration i
sign = sign_data[k,]
# getting the edge that contains the sign
edge_number = sign[[1]]
# storing the edge_number into edges vector
edges[k] = edge_number
# getting the distance on the edge of the sign
distance_on_edge = sign[[2]]
original_distance = as.numeric(edge_length[edge_number])
center_sign = original_distance*distance_on_edge
vertices = vertex_matrix[edge_number,]
if(center_sign <= borderline){
# going
going_edges = which(vertex_matrix[,1] %in% vertices[1])
if(length(going_edges) > 0){
for (i in 1:length(going_edges)) {
center_going = -center_sign
idx = which(data_on_graph_red$edge_number == going_edges[i])
list_for_indices[[going_edges[i]]] = idx
aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[going_edges[i]])
eval = matrix(fun(aux - center_going), ncol = 1)
list_for_max_values[[going_edges[i]]] = apply(cbind(eval, list_for_max_values[[going_edges[i]]]) , 1, max)
}
}
# coming
coming_edges = which(vertex_matrix[,2] %in% vertices[1])
if(length(coming_edges) > 0){
for (i in 1:length(coming_edges)) {
center_coming = as.numeric(edge_length[coming_edges[i]]) + center_sign
idx = which(data_on_graph_red$edge_number == coming_edges[i])
list_for_indices[[coming_edges[i]]] = idx
aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[coming_edges[i]])
eval = matrix(fun(aux - center_coming), ncol = 1)
list_for_max_values[[coming_edges[i]]] = apply(cbind(eval, list_for_max_values[[coming_edges[i]]]) , 1, max)
}
}
}
if(center_sign >= (original_distance- borderline)){
# going
going_edges = which(vertex_matrix[,1] %in% vertices[2])
if(length(going_edges) > 0){
for (i in 1:length(going_edges)) {
center_going = -(original_distance - center_sign)
idx = which(data_on_graph_red$edge_number == going_edges[i])
list_for_indices[[going_edges[i]]] = idx
aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[going_edges[i]])
eval = matrix(fun(aux - center_going), ncol = 1)
list_for_max_values[[going_edges[i]]] = apply(cbind(eval, list_for_max_values[[going_edges[i]]]) , 1, max)
}
}
# coming
coming_edges = which(vertex_matrix[,2] %in% vertices[2])
if(length(coming_edges) > 0){
for (i in 1:length(coming_edges)) {
center_coming = as.numeric(edge_length[coming_edges[i]]) + (original_distance - center_sign)
idx = which(data_on_graph_red$edge_number == coming_edges[i])
list_for_indices[[coming_edges[i]]] = idx
aux = data_on_graph_red$distance_on_edge[idx]*as.numeric(edge_length[coming_edges[i]])
eval = matrix(fun(aux - center_coming), ncol = 1)
list_for_max_values[[coming_edges[i]]] = apply(cbind(eval, list_for_max_values[[coming_edges[i]]]) , 1, max)
} # end for
}# end if
} # end else
}
#----------------------------------------------------------------------------------------------------
# we compute covariate at the points on each edge just taking into account only signs in the same edge
for (k in 1:nrow(sign_data)) {
# getting the sign at iteration i
sign = sign_data[k,]
# getting the edge that contains the sign
edge_number = sign[[1]]
# storing the edge_number into edges vector
edges[k] = edge_number
# getting the distance on the edge of the sign
distance_on_edge = sign[[2]]
# indices of the data that has edge numbers equal to edge_number (in practice, getting the identity of the point the edge with number edge_number)
idx = which(data_on_graph_red$edge_number == edge_number)
list_for_indices[[edge_number]] = cbind(idx, list_for_indices[[edge_number]]) # to repeat the columns
# getting the distance of the points in the edge of interest
aux = data_on_graph_red$distance_on_edge[idx]
# computing the value of the covariate (with normalized and no normalized distance)
eval = matrix(fun(as.numeric(edge_length[edge_number])*(aux - distance_on_edge)), ncol = 1)
list_for_max_values[[edge_number]] = apply(cbind(eval, list_for_max_values[[edge_number]]) , 1, max)
}
list_for_indices[[(sf_graph$nE + 1)]] = NULL
list_for_max_values[[(sf_graph$nE + 1)]] = NULL
# drop NULL members
filtered_max_values = list_for_max_values[!sapply(list_for_max_values, is.null)]
filtered_indices = list_for_indices[!sapply(list_for_indices, is.null)]
new_filtered_max_values = filtered_max_values[sapply(filtered_max_values, function(x) length(x) > 0)]
new_filtered_indices = filtered_indices[sapply(filtered_indices, function(x) length(x) > 0)]
# Creating number of signs covariate ----------------------------------------------------------
cov_number_sign = rep(0, nrow(data_on_graph_red))
for (i in 1:length(new_filtered_indices)) {
if(length(dim(new_filtered_indices[[i]])) == 2){ # that is, if it is a matrix
cov_number_sign[new_filtered_indices[[i]][,1]] = ncol(new_filtered_indices[[i]])
new_filtered_indices[[i]] = as.vector(new_filtered_indices[[i]][,1]) # just getting one column
}
}
# -------------------------------------------------------------------------------------------------
covariate_alltogether = data.frame(all_indices = unlist(new_filtered_indices),
all_max_values = unlist(new_filtered_max_values))
final_cov = rep(0, nrow(data_on_graph_red))
final_cov[covariate_alltogether$all_indices] = covariate_alltogether$all_max_values
final4 = data_on_graph_red %>% mutate(final_cov = final_cov, cov_number_sign = cov_number_sign)
cov_obs <- data_on_graph[[".distance_on_edge"]]
cov_obs <- 2 * ifelse(cov_obs > 0.5, 1 - cov_obs, cov_obs)
data_on_mesh_with_covariates = data_on_graph %>% mutate(bus = final1$final_cov,
bus_number = final1$cov_number_sign,
signal = final2$final_cov,
signal_number = final2$cov_number_sign,
stop = final3$final_cov,
stop_number = final3$cov_number_sign,
crossing = final4$final_cov,
crossing_number = final4$cov_number_sign,
cov_obs = cov_obs)
save(data_on_mesh_with_covariates, file = "Data_files/data_on_mesh_with_covariates_partialtomtom.RData")
}load(here("Data_files/data_on_mesh_with_covariates_partialtomtom.RData"))
data_on_mesh_with_covariates |> head(5) |> paged_table()data_on_mesh_with_covariates |> dim()## [1] 7169 59
ggplot() + geom_point(data = data_on_mesh_with_covariates, aes(x = .coord_x, y = .coord_y, color = SpeedLimit)) + theme_minimal()Speed Limits on mesh nodes